
#PSRM Regression Nov 2025

sessionInfo() # package version

#> sessionInfo() # package version
#R version 4.3.3 (2024-02-29 ucrt)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 11 x64 (build 26200)


library(tseries)
library(vars)
library(nnet)
library(lmtest)
library(pscl)
library(MASS)
library(Zelig) # version Zelig_4.2-1 #use remote to install for the latest version of Zelig if not yet installed
library(broom)
library(boot)
library(fixest)
library(xts)
library(zoo)
library(lubridate)
library(dplyr)
library(tidyverse)
library(reshape)
library(reshape2)
library(ggplot2)
library(texreg)
library(extrafont)
library(xtable)


# Loading data ------------------------------------------------------------

getwd() 

tw <- readRDS("tw_nov_2025_regression.Rds")


icews_cn_twn_7sep <- read_csv("icews_cn_twn_7sep.csv") # ICEWS Data

# For Regression Table Formatting
var_latex = list("escalation.l1" = "Escalation (1-day Lag)",
                 "escalation.l2" = "Escalation (2-day Lag)",
                 "escalation.l3" = "Escalation (3-day Lag)",
                 "escalation.l4" = "Escalation (4-day Lag)",
                 "escalation.l5" = "Escalation (5-day Lag)",
                 "implicit_threat.l1" = "Implicit Threat (1-day Lag)",
                 "implicit_threat.l2" = "Implicit Threat (2-day Lag)",
                 "implicit_threat.l3" = "Implicit Threat (3-day Lag)",
                 "implicit_threat.l4" = "Implicit Threat (4-day Lag)",
                 "implicit_threat.l5" = "Implicit Threat (5-day Lag)",
                 "const" = "Constant",
                 "(Intercept)" = "(Intercept)",
                 "dplyr::lag(implicit_threat)" = "Implicit Threat Score (1-day Lag)",
                 "dplyr::lag(implicit_threat, 2)" = "Implicit Threat Score (2-day Lag)",
                 "dplyr::lag(implicit_threat, 3)" = "Implicit Threat Score (3-day Lag)",
                 "dplyr::lag(implicit_threat, 4)" = "Implicit Threat Score (4-day Lag)",
                 "dplyr::lag(implicit_threat, 5)" = "Implicit Threat Score (5-day Lag)",
                 "dplyr::lag(lss_score)" = "Implicit Threat Score (1-day Lag)",
                 "dplyr::lag(lss_score, 2)" = "Implicit Threat Score (2-day Lag)",
                 "dplyr::lag(lss_score, 3)" = "Implicit Threat Score (3-day Lag)",
                 "dplyr::lag(lss_score, 4)" = "Implicit Threat Score (4-day Lag)",
                 "dplyr::lag(lss_score, 5)" = "Implicit Threat Score (5-day Lag)",
                 "lag_escalation" = "Escalation (1-day Lag)",
                 "im_2da" = "Implicit Threat Score (2-day Rolling Avg)",
                 "im_3da" = "Implicit Threat Score (3-day Rolling Avg)",
                 "im_4da" = "Implicit Threat Score (4-day Rolling Avg)",
                 "im_5da" = "Implicit Threat Score (5-day Rolling Avg)",
                 "im_6da" = "Implicit Threat Score (6-day Rolling Avg)",
                 "im_7da" = "Implicit Threat Score (7-day Rolling Avg)",
                 "im_14da" = "Implicit Threat Score (14-day Rolling Avg)",
                 "lss_3da" = "Implicit Threat Score (3-day Rolling Avg)",
                 "lss_5da" = "Implicit Threat Score (5-day Rolling Avg)",
                 "lss_7da" = "Implicit Threat Score (7-day Rolling Avg)",
                 "lss_14da" = "Implicit Threat Score (14-day Rolling Avg)"
)



# Data Merge --------------------------------------------------------------

# People's Daily News Articles Data
lss_score = tw %>%
  group_by(date) %>%
  summarise_at(c("lss_score", "implicit_threat"), ~mean(.)) %>%
  mutate(lss_score = scales::rescale(lss_score))%>%
  mutate(implicit_threat =implicit_threat)

# data of ICEWS

attack = icews_cn_twn_7sep %>%
  pull(Event.Date) %>%
  unique()


# Lagging the data --------------------------------------------------------


dat1 = data.frame(date = seq(as.Date("2015-12-31"), as.Date("2022-12-31"), by = "day")) %>%
  left_join(lss_score, by = "date") %>%
  mutate_at(c("lss_score", "implicit_threat"), ~replace_na(., 0)) %>%
  mutate(lss_2da = rollmean(lss_score, 2, align = "right", fill = NA),
         lss_3da = rollmean(lss_score, 3, align = "right", fill = NA),
         lss_4da = rollmean(lss_score, 4, align = "right", fill = NA),
         lss_5da = rollmean(lss_score, 5, align = "right", fill = NA),
         lss_6da = rollmean(lss_score, 6, align = "right", fill = NA),
         lss_7da = rollmean(lss_score, 7, align = "right", fill = NA),
         lss_14da = rollmean(lss_score, 14, align = "right", fill = NA),
         im_2da = rollmean(implicit_threat, 2, align = "right", fill = NA),
         im_3da = rollmean(implicit_threat, 3, align = "right", fill = NA),
         im_4da = rollmean(implicit_threat, 4, align = "right", fill = NA),
         im_5da = rollmean(implicit_threat, 5, align = "right", fill = NA),
         im_6da = rollmean(implicit_threat, 6, align = "right", fill = NA),
         im_7da = rollmean(implicit_threat, 7, align = "right", fill = NA),
         escalation = as.numeric(date %in% attack),
         year = lubridate::year(date))



# create lag variables ----------------------------------------------------



dat1 <- dat1 %>%
  mutate(lag_implicit_threat = dplyr::lag(implicit_threat))
dat1 <- dat1 %>%
  mutate(lag_escalation = dplyr::lag(escalation))


# using 2016 as baseline for year fixed effects ---------------------------


dat1$year <- as.factor(dat1$year)
dat1$year <- relevel(dat1$year, ref = "2016")



# Main Text Table 3 p.12-------------------------------------------------------
model_logistic <- list(
  feglm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year), dat1, "logit"),
  feglm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2), dat1, "logit"),
  feglm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3), dat1, "logit"),
  feglm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + implicit_threat + dplyr::lag(implicit_threat, 3) + dplyr::lag(implicit_threat, 4) + implicit_threat + lag_escalation, dat1, "logit"),
  feglm(escalation ~ im_3da + as.factor(year), dat1, "logit"),
  feglm(escalation ~ im_5da + as.factor(year), dat1, "logit")
)

screenreg(
  model_logistic,
  omit = c("Implicit_threat", "year"),
  custom.coef.map = var_latex
)

texreg(
  model_logistic,
  omit = c("Implicit_threat", "year"),
  custom.coef.map = var_latex,
  file = "Main_Text_Table3.txt"
)



# DWT Test # Main text Analysis p. 11----------------------------------------------------------------
m1<-feglm(escalation ~  lag_implicit_threat+ as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3) + dplyr::lag(implicit_threat, 4)+ lag_escalation, dat1, "logit")


# Calculate Durbin-Watson statistic manually
residuals_m1 <- residuals(m1, type = "response")
residual_diffs <- diff(residuals_m1)

# Numerator: Sum of squared differences of residuals
numerator <- sum(residual_diffs^2)
denominator <- sum(residuals_m1^2)
durbin_watson_stat <- numerator / denominator
fake_model <- lm(residuals_m1 ~ 1)

dw_result <- dwtest(fake_model)
cat("Durbin-Watson statistic (dwtest):", dw_result$statistic, "\n")
cat("p-value:", dw_result$p.value, "\n")


# Main text Figure 1 p. 11 -----------------------------------------------------------
predicted_probs <- predict(m1, type = "response")

threshold <- 0.5
predicted_classes <- ifelse(predicted_probs > threshold, 1, 0)

actual_classes <- dat1$escalation
valid_indices <- !is.na(actual_classes) & !is.na(predicted_probs)


residuals_m1 <- residuals(m1, type = "response")

newdata <- data.frame(lag_implicit_threat = seq(0, 2.3, length.out = 100))

boot_fn <- function(data, indices) {
  data_resampled <- data[indices, ]
  model <- tryCatch(
    glm(escalation ~ lag_implicit_threat, data = data_resampled, family = binomial),
    error = function(e) return(NULL)
  )
  if (is.null(model)) {
    return(rep(NA, nrow(newdata))) # Return NA for failed iterations
  }
  pred <- predict(model, newdata = newdata, type = "response")
  return(pred)
}


set.seed(100)  # For replication
bootstrap_results <- boot(data = dat1, statistic = boot_fn, R = 1000)


predicted_df <- map_df(1:nrow(newdata), function(i) {
  bootstrap_samples <- bootstrap_results$t[, i]  # Extract predictions for a specific `lag_implicit_threat`
  ci <- quantile(bootstrap_samples, probs = c(0.025, 0.5, 0.975), na.rm = TRUE)  # 95% CI
  data.frame(
    lag_implicit_threat = newdata$lag_implicit_threat[i],
    lower = ci[1],
    median = ci[2],
    upper = ci[3]
  )
})


dat1 <- dat1[!is.na(dat1$lag_implicit_threat), ]


# Predicted Probability Plot in the Main Text. ----------------------------


implicit_threat_plot<- ggplot(predicted_df, aes(x = lag_implicit_threat, y = median)) +
  geom_line(color = "blue", size = 1.5) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, fill = "blue") +
  geom_rug(
    data = dat1, aes(x = lag_implicit_threat),
    inherit.aes = FALSE,
    sides = "b",
    color = "orange", alpha = 0.8, size = 0.5
  ) +
  labs(
    x = "Implicit Threat Score (1-day Lag)",
    y = "Predicted Probability of Escalation",
    caption = "The orange rug plot shows the values of 1-day lagged implicit threat scores in the data"
  ) +
  theme_minimal()

ggsave("Main_text_Figure_1.png", plot = implicit_threat_plot, width = 8, height = 6, dpi = 300) # For replicability


# Appendix LSS Model Table 6 p.9 ----------------------------------------------

model_lss <- list(
  feglm(escalation ~ dplyr::lag(lss_score) + as.factor(year), data = dat1, family = "logit"),
  feglm(escalation ~ dplyr::lag(lss_score) + as.factor(year) + dplyr::lag(lss_score, 2), data = dat1, family = "logit"),
  feglm(escalation ~ dplyr::lag(lss_score) + as.factor(year) + dplyr::lag(lss_score, 2) + dplyr::lag(lss_score, 3), data = dat1, family = "logit"),
  feglm(escalation ~ dplyr::lag(lss_score) + as.factor(year) + dplyr::lag(lss_score, 2) + dplyr::lag(lss_score, 3) + dplyr::lag(lss_score, 4) + lag_escalation, data = dat1, family = "logit"),
  feglm(escalation ~ lss_3da + as.factor(year), data = dat1, family = "logit"),
  feglm(escalation ~ lss_5da + as.factor(year), data = dat1, family = "logit")
)


screenreg(model_lss, omit = c("implicit_threat", "year"), custom.coef.map = var_latex)
texreg(model_lss, omit = c("implicit_threat", "year"), custom.coef.map = var_latex, file = "Appendix_Table_6.txt")

# Appendix Rare Event Estimation Table 7  p.10---------------------------------------------------
# Zelig_4.2-1
model_relogit <- list(
  zelig(escalation ~ dplyr::lag(implicit_threat) + as.factor(year), data = dat1, model = "relogit"),
  zelig(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2), data = dat1, model = "relogit"),
  zelig(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3), data = dat1, model = "relogit"),
  zelig(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3) + dplyr::lag(implicit_threat, 4) + lag_escalation, data = dat1, model = "relogit"),
  zelig(escalation ~ im_3da + as.factor(year), data = dat1, model = "relogit"),
  zelig(escalation ~ im_5da + as.factor(year), data = dat1, model = "relogit")
)



screenreg(model_relogit, omit = c("implicit_threat", "year"), custom.coef.map = var_latex)
texreg(model_relogit, omit = c("implicit_threat", "year"), custom.coef.map = var_latex, file = "Appendix_Table_7.txt") 

# for other version of Zelig
#model_relogit # check result

# Table generation
#res_list <- lapply(model_relogit, summary)

#sink("Appendix_Table_7.txt")
#for (i in seq_along(res_list)) {
#  cat("============================================================\n")
#  cat("Model", i, "\n")
#  cat("------------------------------------------------------------\n")
#  print(res_list[[i]])
#  cat("\n\n")
#}
#sink()

# Appendix OLS Model Table 8 p.11 -----------------------------------------------------


model_lm <- list(
  lm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year), data = dat1),
  lm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2), data = dat1),
  lm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3), data = dat1),
  lm(escalation ~ dplyr::lag(implicit_threat) + as.factor(year) + dplyr::lag(implicit_threat, 2) + dplyr::lag(implicit_threat, 3) + dplyr::lag(implicit_threat, 4) + lag_escalation, data = dat1),
  lm(escalation ~ im_3da + as.factor(year), data = dat1),
  lm(escalation ~ im_5da + as.factor(year), data = dat1)
)

screenreg(model_lm, omit = "year", custom.coef.map = var_latex)

texreg( model_lm, omit = "year",  custom.coef.map = var_latex,
        file = "Appendix_Table_8.txt")



# Appendix Removing 0 test Table 9 p.12 ---------------------------------------------------------

dat1_filtered <- dat1 %>%
  filter(im_2da != 0 & im_3da != 0 & im_4da != 0 & im_5da != 0& im_6da != 0&im_7da != 0)

model_im <- list(
  feglm(escalation ~ im_2da, data = dat1_filtered, family = "logit"),
  feglm(escalation ~ im_3da, data = dat1_filtered, family = "logit"),
  feglm(escalation ~ im_4da, data = dat1_filtered, family = "logit"),
  feglm(escalation ~ im_5da, data = dat1_filtered, family = "logit"),
  feglm(escalation ~ im_6da, data = dat1_filtered, family = "logit"),
  feglm(escalation ~ im_7da, data = dat1_filtered, family = "logit")
)

screenreg( model_im,  custom.coef.map = var_latex)
texreg( model_im, omit = "year",  custom.coef.map = var_latex,
        file = "Appendix_Table_9.txt")

# Appendix VAR  Table 10 p.13--------------------------------------------------------

data <- cbind(dat1$escalation, dat1$implicit_threat)
colnames(data) <- c("escalation", "implicit_threat")

# Check for Stationarity
adf_escalation <- adf.test(dat1$escalation, alternative = "stationary")
adf_implicit_threat <- adf.test(dat1$implicit_threat, alternative = "stationary")

# Differencing the Data if Non-Stationary
if (adf_escalation$p.value > 0.05 || adf_implicit_threat$p.value > 0.05) {
  data_diff <- diff(data, differences = 1)
  cat("Data differenced due to non-stationarity.\n")
} else {
  data_diff <- data
  cat("No differencing applied.\n")
}

# Lag Selection
lag_selection <- VARselect(data_diff, lag.max = 7, type = "const")
print(lag_selection$selection)
optimal_lag <- lag_selection$selection["AIC(n)"]

# Fit VAR Model
var_model <- VAR(data_diff, p = optimal_lag, type = "const")
summary(var_model)


model_var <- list(
  var_model$varresult$escalation,
  var_model$varresult$implicit_threat
)

screenreg(
  model_var,
  custom.model.names = c("Model 1 (Escalation as DV)", "Model 2 (Implicit Threat as DV)"),
  custom.coef.map = var_latex
)

texreg(
  model_var,
  custom.model.names = c("Model 1 (Escalation as DV)", "Model 2 (Implicit Threat as DV)"),
  custom.coef.map = var_latex,
  file = "Appendix_Table_10.txt"
)


# Appendix Descriptive Stat Table 5 p.7 ----------------------------------------

summary(dat1$implicit_threat)
summary(dat1$lss_score)
summary(dat1$escalation)
